/*
 * LearningAPIMain.java
 *
 * Yaoyong Li 22/03/2007
 *
 * $Id: LearningAPIMain.java, v 1.0 2007-03-22 12:58:16 +0000 yaoyong $
 */
package gate.learning;

import gate.Document;
import gate.Factory;
import gate.ProcessingResource;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.util.Benchmark;
import gate.util.Benchmarkable;
import gate.util.BomStrippingInputStreamReader;
import gate.util.Files;
import gate.util.GateException;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Date;
import java.util.HashMap;

import org.apache.log4j.Logger;

/**
 * The main object of the ML Api. It does initialiation, read parameter values
 * from GUI, and run the selected learning mode. It can also be called by java
 * code, as an API (an GATE class), for using this learning api.
 */
@SuppressWarnings("serial")
public class LearningAPIMain extends AbstractLanguageAnalyser
        implements
        ProcessingResource,
        Benchmarkable {

    /** This is where the model(s) should be saved */
    private URL configFileURL;
    /**
     * Name of the AnnotationSet contains annotations specified in the DATASET
     * element of configuration file.
     */
    private String inputASName;
    /**
     * The annotationSet for the resulting annotations by application of models.
     */
    private String outputASName;
    /**
     * Run-time parameter learningMode, having three modes: training, application,
     * and evaluation.
     */
    private RunMode learningMode;
    private RunMode learningModeAppl;
    private RunMode learningModeMiTraining;
    private RunMode learningModeVIEWSVMMODEL;
    private RunMode learningModeSelectingDocs;
    /** Learning settings specified in the configuration file. */
    private LearningEngineSettings learningSettings;
    /**
     * The lightweight learning object for getting the features, training and
     * application.
     */
    LightWeightLearningApi lightWeightApi = null;
    /** The File for NLP learning Log. */
    private File logFile;
    /** Used by lightWeightApi, specifying training or application. */
    private boolean isTraining;
    /** Subdirectory for storing the data file produced by learning api. */
    private File workingDirectoryResults = null;
    /** Subdirectory used to store temporary files used by APPLICATION mode. */
    private File applicationTempDir;
    /** Doing evaluation. */
    private EvaluationBasedOnDocs evaluation;
    /** The MI learning information object. */
    MiLearningInformation miLearningInfor = null;
    /** The three counters for batch application. */
    int startDocIdApp;
    int endDocIdApp;
    int maxNumApp;

    /** Trivial constructor. */
    public LearningAPIMain() {
        // do nothing
    }
    // featureMap that is used for exporting log messages
    protected java.util.Map benchmarkingFeatures = new HashMap();

    /** Initialize this resource, and return it. */
    @Override
    public gate.Resource init() throws ResourceInstantiationException {
        fireStatusChanged("Checking and reading learning settings!");
        // here all parameters are needs to be checked
        // check for the model storage directory
        if (configFileURL == null) {
            throw new ResourceInstantiationException(
                    "WorkingDirectory is required to store the learned model and cannot be null");
        }
        // it is not null, check it is a file: URL
        if (!"file".equals(configFileURL.getProtocol())) {
            throw new ResourceInstantiationException(
                    "WorkingDirectory must be a file: URL");
        }
        // Get the working directory which the configuration
        // file reside in.
        File workingDirectory = null;
        try {
            workingDirectory = new File(configFileURL.toURI()).getParentFile();
        } catch (URISyntaxException use) {
            workingDirectory = Files.fileFromURL(configFileURL).getParentFile();
        }
        // it must be a directory
        if (!workingDirectory.isDirectory()) {
            throw new ResourceInstantiationException(workingDirectory
                    + " must be a reference to directory");
        }
        if (LogService.maxVerbosityLevel > 0) {
            System.out.println("Configuration File=" + configFileURL.toString());
        }
        try {
            if (!new File(configFileURL.toURI()).exists()) {
                throw new ResourceInstantiationException(
                        "Error: the configuration file specified does not exist!!");
            }
        } catch (URISyntaxException e1) {
            e1.printStackTrace();
            throw new ResourceInstantiationException(e1);
        }
        miLearningInfor = new MiLearningInformation();
        try {
            // Load the learning setting file
            // by reading the configuration file
            learningSettings =
                    LearningEngineSettings.loadLearningSettingsFromFile(configFileURL);
        } catch (Exception e) {
            throw new ResourceInstantiationException(e);
        }
        try {
            // Creat the sub-directory of the workingdirectroy where the data
            // files will be stored in
            if (LogService.maxVerbosityLevel > 0) {
                System.out.println("\n\n*************************");
                System.out.println("A new session for NLP learning is starting.\n");
            }
            workingDirectoryResults =
                    new File(workingDirectory, gate.learning.ConstantParameters.SUBDIRFORRESULTS);
            workingDirectoryResults.mkdir();
            logFile =
                    new File(new File(workingDirectory, ConstantParameters.SUBDIRFORRESULTS),
                    ConstantParameters.FILENAMEOFLOGFILE);
            LogService.init(logFile, true, learningSettings.verbosityLogService);
            StringBuffer logMessage = new StringBuffer();
            logMessage.append("\n\n*************************\n");
            logMessage.append("A new session for NLP learning is starting.\n");
            // adding WorkingDirectory parameter in the benchmarkingFeatures
            benchmarkingFeatures.put("workingDirectory", workingDirectory.getAbsolutePath());
            logMessage.append("The initiliased time of NLP learning: "
                    + new Date().toString() + "\n");
            logMessage.append("Working directory: " + workingDirectory.getAbsolutePath() + "\n");
            logMessage.append("The feature files and models are saved at: "
                    + workingDirectoryResults.getAbsolutePath() + "\n");
            // Call the lightWeightLearningApi
            lightWeightApi = new LightWeightLearningApi(workingDirectory);
            // more initialisation
            lightWeightApi.furtherInit(workingDirectoryResults, learningSettings);
            logMessage.append("Learner name: "
                    + learningSettings.learnerSettings.getLearnerName() + "\n");
            logMessage.append("Learner nick name: "
                    + learningSettings.learnerSettings.getLearnerNickName() + "\n");
            logMessage.append("Learner parameter settings: "
                    + learningSettings.learnerSettings.implementationName + "\n");
            logMessage.append("Surroud mode (or chunk learning): "
                    + learningSettings.surround);
            LogService.logMessage(logMessage.toString(), 1);
        } catch (Exception e) {
            throw new ResourceInstantiationException(e);
        }
        learningModeAppl = RunMode.CLASSIFICATION;
        maxNumApp = learningSettings.docIntervalForClassification;
        learningModeMiTraining = RunMode.MITRAINING;
        learningModeVIEWSVMMODEL = RunMode.VIEWPRIMALFORMMODELS;
        learningModeSelectingDocs = RunMode.RankingDocsForAL;
        fireProcessFinished();
        return this;
    } // init()

    /**
     * Run the resource.
     *
     * @throws ExecutionException
     */
    @Override
    public void execute() throws ExecutionException {
        // mode in which the PR is executed
        benchmarkingFeatures.put("learningMode", learningMode);
        if (learningMode.equals(learningModeVIEWSVMMODEL)) {
            if (corpus == null || corpus.size() == 0 || corpus.indexOf(document) == 0) {
                lightWeightApi.viewSVMmodelsInNLPFeatures(new File(workingDirectoryResults,
                        ConstantParameters.FILENAME_OF_MODEL), learningSettings);
            }
            return;
        }
        if (learningMode.equals(learningModeSelectingDocs)) {
            // for ordering and selecting the documents for ative learning
            if (corpus == null || corpus.size() == 0 || corpus.indexOf(document) == 0) {
                // ranking the documents
                lightWeightApi.orderDocsWithModels(workingDirectoryResults, learningSettings);
                // selecting the document
                // lightWeightApi.selectDocForAL()
            }
            return;
        }
        // now we need to see if the corpus is provided
        if (corpus == null) {
            throw new ExecutionException("Provided corpus is null!");
        }
        if (corpus.size() == 0) {
            throw new ExecutionException("No Document found in corpus!");
        }
        // set benchmark ID on the lightWeightApi
        String oldLightWeightApiParentId = null;
        if (lightWeightApi instanceof Benchmarkable) {
            oldLightWeightApiParentId = lightWeightApi.getParentBenchmarkId();
            lightWeightApi.createBenchmarkId(getBenchmarkId());
        }
        // first, get the NLP features from the documents, according to the
        // feature types specified in DataSetDefinition file
        int positionDoc = corpus.indexOf(document);
        // first document in the corpus
        if (positionDoc == 0) {
            lightWeightApi.inputASName = inputASName;
            lightWeightApi.outputASName = outputASName;
            /** Obtain the MI learning information of the last time learning. */
            if (learningMode.equals(this.learningModeMiTraining)) {
                miLearningInfor = new MiLearningInformation();
                File miLeFile =
                        new File(workingDirectoryResults,
                        ConstantParameters.FILENAMEOFMILearningInfor);
                long startTime = Benchmark.startPoint();
                benchmarkingFeatures.put("miLearningInformationFile", miLeFile.getAbsolutePath());
                miLearningInfor.readDataFromFile(miLeFile);
                Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                        + Benchmark.READING_LEARNING_INFO, this, benchmarkingFeatures);
                benchmarkingFeatures.remove("miLearningInformationFile");
            }
            /** Set the information for batch application. */
            startDocIdApp = 0;
            endDocIdApp = 0;
            if (LogService.maxVerbosityLevel > 0) {
                System.out.println("Pre-processing the " + corpus.size()
                        + " documents...");
            }
            try {
                LogService.init(logFile, true, learningSettings.verbosityLogService);
                LogService.logMessage("\n*** A new run starts.", 1);
                LogService.logMessage(
                        "\nThe execution time (pre-processing the first document): "
                        + new Date().toString(), 1);
                if (LogService.maxVerbosityLevel > 0) {
                    System.out.println("Learning starts.");
                    System.out.println("For the information about this learning see the log file "
                            + workingDirectoryResults.getAbsolutePath()
                            + File.separator
                            + ConstantParameters.FILENAMEOFLOGFILE);
                    System.out.println("The number of threads used is "
                            + learningSettings.numThreadUsed);
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        // Apply the model to a bunch of documents
        if (learningMode.equals(learningModeAppl)) {
            ++endDocIdApp;
            if (endDocIdApp - startDocIdApp == maxNumApp) {
                try {
                    // first checking if the model file is available or not
                    String modelFileName =
                            workingDirectoryResults.toString() + File.separator
                            + ConstantParameters.FILENAME_OF_MODEL;
                    if (!new File(modelFileName).exists()) {
                        System.out.println("Warning: the model is not available at the moment!!");
                        return;
                    }
                    BufferedWriter outNLPFeatures = null;
                    BufferedReader inNLPFeatures = null;
                    BufferedWriter outFeatureVectors = null;
                    // EvaluationBasedOnDocs.emptyDatafile(wdResults, false);
                    if (LogService.maxVerbosityLevel > 0) {
                        System.out.println("** " + "Application mode for document from "
                                + startDocIdApp + " to " + endDocIdApp + "(not included):");
                    }
                    LogService.logMessage("** Application mode for document from "
                            + startDocIdApp + " to " + endDocIdApp
                            + "(not included):", 1);
                    isTraining = false;
                    String classTypeOriginal =
                            learningSettings.datasetDefinition.getClassAttribute().getType();
                    outNLPFeatures =
                            new BufferedWriter(
                            new OutputStreamWriter(
                            new FileOutputStream(
                            new File(
                            getApplicationTempDir(),
                            ConstantParameters.FILENAME_FEATURES_CSV)),
                            "UTF-8"));
                    int numDoc;
                    numDoc = endDocIdApp - startDocIdApp;
                    long startTime = Benchmark.startPoint();
                    benchmarkingFeatures.put("numDocs", "" + numDoc);
                    for (int i = startDocIdApp; i < endDocIdApp; ++i) {
                        Document toProcess = (Document) corpus.get(i);
                        lightWeightApi.annotations2NLPFeatures(toProcess,
                                i - startDocIdApp, outNLPFeatures, isTraining,
                                learningSettings);
                        if (toProcess.getDataStore() != null
                                && corpus.getDataStore() != null) {// (isDatastore)
                            corpus.getDataStore().sync(corpus);
                        }
                    }
                    outNLPFeatures.flush();
                    outNLPFeatures.close();
                    lightWeightApi.finishFVs(getApplicationTempDir(), numDoc, isTraining,
                            learningSettings);
                    Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                            + Benchmark.ANNOTS_TO_NLP_FEATURES, this,
                            benchmarkingFeatures);
                    startTime = Benchmark.startPoint();
                    /** Open the normal NLP feature file. */
                    inNLPFeatures =
                            new BomStrippingInputStreamReader(
                            new FileInputStream(
                            new File(
                            getApplicationTempDir(),
                            ConstantParameters.FILENAME_FEATURES_CSV)),
                            "UTF-8");
                    outFeatureVectors =
                            new BufferedWriter(
                            new OutputStreamWriter(
                            new FileOutputStream(
                            new File(
                            getApplicationTempDir(),
                            ConstantParameters.FILENAMEOFFeatureVectorDataApp)),
                            "UTF-8"));
                    lightWeightApi.nlpfeatures2FVs(getApplicationTempDir(),
                            inNLPFeatures, outFeatureVectors, numDoc, isTraining,
                            learningSettings);
                    inNLPFeatures.close();
                    outFeatureVectors.flush();
                    outFeatureVectors.close();
                    Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                            + Benchmark.NLP_FEATURES_TO_FVS, this, benchmarkingFeatures);
                    // Applying th model
                    startTime = Benchmark.startPoint();
                    lightWeightApi.applyModelInJava(corpus, startDocIdApp, endDocIdApp,
                            classTypeOriginal, learningSettings, getApplicationTempDir());
                    Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                            + Benchmark.MODEL_APPLICATION, this, benchmarkingFeatures);
                    benchmarkingFeatures.remove("numDocs");
                    startDocIdApp = endDocIdApp;
                } catch (IOException e) {
                    e.printStackTrace();
                } catch (GateException e) {
                    e.printStackTrace();
                }
            }
        }
        // we've reached the last document
        if (positionDoc == corpus.size() - 1) {
            // first select the training data and test data according to the
            // learning setting
            // set the inputASName in here, because it is a runtime parameter
            int numDoc = corpus.size();
            try {
                LogService.init(logFile, true, learningSettings.verbosityLogService);
                LogService.logMessage("The learning start at " + new Date().toString(),
                        1);
                LogService.logMessage("The number of documents in dataset: " + numDoc,
                        1);
                // Open the NLP feature file for storing the NLP feature vectors
                BufferedWriter outNLPFeatures = null;
                BufferedReader inNLPFeatures = null;
                BufferedWriter outFeatureVectors = null;
                // if only need the feature data
                switch (learningMode) {
                    case EXTRACT_FEATURE:
                        // if only want feature data
                        EvaluationBasedOnDocs.emptyDatafile(workingDirectoryResults, true);
                        if (LogService.maxVerbosityLevel > 0) {
                            System.out.println("** Producing the feature files only!");
                        }
                        LogService.logMessage("** Producing the feature files only!", 1);
                        long startTime = Benchmark.startPoint();
                        benchmarkingFeatures.put("numDocs", numDoc);
                        isTraining = true;
                        outNLPFeatures =
                                new BufferedWriter(
                                new OutputStreamWriter(
                                new FileOutputStream(
                                new File(
                                workingDirectoryResults,
                                ConstantParameters.FILENAME_FEATURES_CSV)),
                                "UTF-8"));

                        //In the meantime, also write the name of documents and total number of them into a file
                        BufferedWriter outDocsName =
                                new BufferedWriter(new OutputStreamWriter(
                                new FileOutputStream(new File(workingDirectoryResults,
                                ConstantParameters.FILENAMEOFDocsName)),
                                "UTF-8"));
                        outDocsName.append("##totalDocs=" + numDoc);
                        outDocsName.newLine();
                        for (int i = 0; i < numDoc; ++i) {
                            Document toProcess = (Document) corpus.get(i);
                            lightWeightApi.annotations2NLPFeatures(toProcess, i,
                                    outNLPFeatures, isTraining, learningSettings);
                            String docN = toProcess.getName();
                            if (docN.contains("_")) {
                                docN = docN.substring(0, docN.lastIndexOf("_"));
                            }
                            outDocsName.append(docN);
                            outDocsName.newLine();
                            if (toProcess.getDataStore() != null
                                    && corpus.getDataStore() != null) {
                                Factory.deleteResource(toProcess);
                            }
                        }
                        outNLPFeatures.flush();
                        outNLPFeatures.close();
                        outDocsName.flush();
                        outDocsName.close();
                        lightWeightApi.finishFVs(workingDirectoryResults, numDoc, isTraining,
                                learningSettings);
                        Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                                + Benchmark.ANNOTS_TO_NLP_FEATURES, this,
                                benchmarkingFeatures);
                        /** Open the normal NLP feature file. */
                        inNLPFeatures =
                                new BomStrippingInputStreamReader(
                                new FileInputStream(
                                new File(
                                workingDirectoryResults,
                                ConstantParameters.FILENAME_FEATURES_CSV)),
                                "UTF-8");
                        outFeatureVectors =
                                new BufferedWriter(
                                new OutputStreamWriter(
                                new FileOutputStream(
                                new File(
                                workingDirectoryResults,
                                ConstantParameters.FILENAMEOFFeatureVectorData)),
                                "UTF-8"));
                        startTime = Benchmark.startPoint();
                        lightWeightApi.nlpfeatures2FVs(workingDirectoryResults, inNLPFeatures,
                                outFeatureVectors, numDoc, isTraining, learningSettings);
                        inNLPFeatures.close();
                        outFeatureVectors.flush();
                        outFeatureVectors.close();
                        Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                                + Benchmark.NLP_FEATURES_TO_FVS, this,
                                benchmarkingFeatures);
                        // produce the ngram language model from feature list
                        if (learningSettings.datasetDefinition.getNgrams() != null) {
                            if (LogService.maxVerbosityLevel > 0) {
                                System.out.println("Write the language model in N-grams into the file "
                                        + ConstantParameters.FILENAMEOFNgramLM + "!");
                            }
                            LogService.logMessage(
                                    "Write the language model in N-grams into the file "
                                    + ConstantParameters.FILENAMEOFNgramLM + "!", 1);
                            if (learningSettings.datasetDefinition.getNgrams().size() >= 1) {
                                startTime = Benchmark.startPoint();
                                lightWeightApi.featureList2LM(workingDirectoryResults,
                                        ((Ngram) learningSettings.datasetDefinition.getNgrams().get(0)).getNumber());
                                Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                                        + Benchmark.WRITING_NGRAM_MODEL, this,
                                        benchmarkingFeatures);
                                // produce the term-frequency matrix
                                if (LogService.maxVerbosityLevel > 0) {
                                    System.out.println("Write the term-document statistics into the file "
                                            + ConstantParameters.FILENAMEOFTermFreqMatrix
                                            + "!");
                                }
                                LogService.logMessage(
                                        "Write the term-document statistics into the file "
                                        + ConstantParameters.FILENAMEOFTermFreqMatrix
                                        + "!", 1);
                                startTime = Benchmark.startPoint();
                                lightWeightApi.termfrequenceMatrix(workingDirectoryResults, numDoc);
                                Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                                        + Benchmark.TERM_DOC_STATS, this, benchmarkingFeatures);
                            } else {
                                System.out.println("!! Warning: cannot produce N-gram data because there is no N-gram "
                                        + "defintion in the configuration file!");
                            }
                        }
                        benchmarkingFeatures.remove("numDocs");
                        // Create the document for storing the names of selected documents
                        // if it doesn't exist.
                        File selectedFile =
                                new File(workingDirectoryResults,
                                ConstantParameters.FILENAMEOFSelectedDOCForAL);
                        if (!selectedFile.exists()) {
                            selectedFile.createNewFile();
                        }
                        if (LogService.maxVerbosityLevel > 0) {
                            displayDataFilesInformation();
                        }
                        break;
                    case TRAINING:
                        // empty the data file
                        Long tm1,
                         tm2,
                         tm3;
                        if (LogService.DEBUG > 1) {
                            tm1 = new Date().getTime();
                        }
                        EvaluationBasedOnDocs.emptyDatafile(workingDirectoryResults, true);
                        if (LogService.maxVerbosityLevel > 0) {
                            System.out.println("** Training mode:");
                        }
                        LogService.logMessage("** Training mode:", 1);
                        startTime = Benchmark.startPoint();
                        benchmarkingFeatures.put("numDocs", "" + numDoc);
                        isTraining = true;
                        outNLPFeatures =
                                new BufferedWriter(
                                new OutputStreamWriter(
                                new FileOutputStream(
                                new File(
                                workingDirectoryResults,
                                ConstantParameters.FILENAME_FEATURES_CSV)),
                                "UTF-8"));
                        for (int i = 0; i < numDoc; ++i) {
                            Document toProcess = (Document) corpus.get(i);
                            lightWeightApi.annotations2NLPFeatures(toProcess, i,
                                    outNLPFeatures, isTraining, learningSettings);
                            if (toProcess.getDataStore() != null
                                    && corpus.getDataStore() != null) {
                                Factory.deleteResource(toProcess);
                            }
                        }
                        outNLPFeatures.flush();
                        outNLPFeatures.close();
                        lightWeightApi.finishFVs(workingDirectoryResults, numDoc, isTraining,
                                learningSettings);
                        Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                                + Benchmark.ANNOTS_TO_NLP_FEATURES, this,
                                benchmarkingFeatures);
                        if (LogService.DEBUG > 1) {
                            tm2 = new Date().getTime();
                            tm3 = tm2 - tm1;
                            tm3 /= 1000;
                            System.out.println("time for NLP features: " + tm3);
                        }
                        /** Open the normal NLP feature file. */
                        inNLPFeatures =
                                new BomStrippingInputStreamReader(
                                new FileInputStream(
                                new File(
                                workingDirectoryResults,
                                ConstantParameters.FILENAME_FEATURES_CSV)),
                                "UTF-8");
                        outFeatureVectors =
                                new BufferedWriter(
                                new OutputStreamWriter(
                                new FileOutputStream(
                                new File(
                                workingDirectoryResults,
                                ConstantParameters.FILENAMEOFFeatureVectorData)),
                                "UTF-8"));
                        startTime = Benchmark.startPoint();
                        lightWeightApi.nlpfeatures2FVs(workingDirectoryResults, inNLPFeatures,
                                outFeatureVectors, numDoc, isTraining, learningSettings);
                        inNLPFeatures.close();
                        outFeatureVectors.flush();
                        outFeatureVectors.close();
                        Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                                + Benchmark.NLP_FEATURES_TO_FVS, this,
                                benchmarkingFeatures);
                        if (LogService.DEBUG > 1) {
                            tm1 = new Date().getTime();
                            tm3 = tm1 - tm2;
                            tm3 /= 1000;
                            System.out.println("time for fv: " + tm3);
                        }
                        // if fitering the training data
                        if (learningSettings.fiteringTrainingData
                                && learningSettings.filteringRatio > 0.0) {
                            startTime = Benchmark.startPoint();
                            lightWeightApi.FilteringNegativeInstsInJava(corpus.size(),
                                    learningSettings);
                            Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                                    + Benchmark.FILTERING, this, benchmarkingFeatures);
                        }
                        if (LogService.DEBUG > 1) {
                            tm2 = new Date().getTime();
                            tm3 = tm2 - tm1;
                            tm3 /= 1000;
                            System.out.println("time for filtering: " + tm3);
                        }
                        startTime = Benchmark.startPoint();
                        // using the java code for training
                        lightWeightApi.trainingJava(corpus.size(), learningSettings);
                        Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                                + Benchmark.MODEL_TRAINING, this, benchmarkingFeatures);
                        benchmarkingFeatures.remove("numDocs");
                        if (LogService.DEBUG > 1) {
                            tm1 = new Date().getTime();
                            tm3 = tm1 - tm2;
                            tm3 /= 1000;
                            System.out.println("time for NLP training: " + tm3);
                        }
                        break;
                    case CLASSIFICATION:
                        // first checking if the model file is available or not
                        String modelFileName =
                                workingDirectoryResults.toString() + File.separator
                                + ConstantParameters.FILENAME_OF_MODEL;
                        if (!new File(modelFileName).exists()) {
                            System.out.println("Warning: the model is not available at the moment!");
                            return;
                        }
                        if (endDocIdApp > startDocIdApp) {
                            if (LogService.maxVerbosityLevel > 0) {
                                System.out.println("** "
                                        + "Application mode for document from " + startDocIdApp
                                        + " to " + endDocIdApp + "(not included):");
                            }
                            LogService.logMessage("** Application mode for document from "
                                    + startDocIdApp + " to " + endDocIdApp
                                    + "(not included):", 1);
                            isTraining = false;
                            String classTypeOriginal =
                                    learningSettings.datasetDefinition.getClassAttribute().getType();
                            outNLPFeatures =
                                    new BufferedWriter(
                                    new OutputStreamWriter(
                                    new FileOutputStream(
                                    new File(
                                    getApplicationTempDir(),
                                    ConstantParameters.FILENAME_FEATURES_CSV)),
                                    "UTF-8"));
                            numDoc = endDocIdApp - startDocIdApp;
                            benchmarkingFeatures.put("numDocs", "" + numDoc);
                            startTime = Benchmark.startPoint();
                            for (int i = startDocIdApp; i < endDocIdApp; ++i) {
                                Document toProcess = (Document) corpus.get(i);
                                lightWeightApi.annotations2NLPFeatures(toProcess, i
                                        - startDocIdApp, outNLPFeatures, isTraining,
                                        learningSettings);
                                if (toProcess.getDataStore() != null
                                        && corpus.getDataStore() != null) {// (isDatastore)
                                    //Factory.deleteResource(toProcess);
                                    corpus.getDataStore().sync(corpus);
                                }
                            }
                            outNLPFeatures.flush();
                            outNLPFeatures.close();
                            lightWeightApi.finishFVs(getApplicationTempDir(), numDoc,
                                    isTraining, learningSettings);
                            Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                                    + Benchmark.ANNOTS_TO_NLP_FEATURES, this,
                                    benchmarkingFeatures);
                            /** Open the normal NLP feature file. */
                            inNLPFeatures =
                                    new BomStrippingInputStreamReader(
                                    new FileInputStream(
                                    new File(
                                    getApplicationTempDir(),
                                    ConstantParameters.FILENAME_FEATURES_CSV)),
                                    "UTF-8");
                            outFeatureVectors =
                                    new BufferedWriter(
                                    new OutputStreamWriter(
                                    new FileOutputStream(
                                    new File(
                                    getApplicationTempDir(),
                                    ConstantParameters.FILENAMEOFFeatureVectorDataApp)),
                                    "UTF-8"));
                            startTime = Benchmark.startPoint();
                            lightWeightApi.nlpfeatures2FVs(getApplicationTempDir(),
                                    inNLPFeatures, outFeatureVectors, numDoc, isTraining,
                                    learningSettings);
                            inNLPFeatures.close();
                            outFeatureVectors.flush();
                            outFeatureVectors.close();
                            Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                                    + Benchmark.NLP_FEATURES_TO_FVS, this,
                                    benchmarkingFeatures);
                            // Applying th model
                            startTime = Benchmark.startPoint();
                            lightWeightApi.applyModelInJava(corpus, startDocIdApp,
                                    endDocIdApp, classTypeOriginal, learningSettings,
                                    getApplicationTempDir());
                            Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                                    + Benchmark.MODEL_APPLICATION, this,
                                    benchmarkingFeatures);
                            benchmarkingFeatures.remove("numDocs");
                            // Update the datastore for the added annotations
                        }
                        break;
                    case EVALUATION:
                        if (LogService.maxVerbosityLevel > 0) {
                            System.out.println("** Evaluation mode:");
                        }
                        LogService.logMessage("** Evaluation mode:", 1);
                        evaluation =
                                new EvaluationBasedOnDocs(corpus, workingDirectoryResults, inputASName);
                        benchmarkingFeatures.put("numDocs", corpus.size());
                        startTime = Benchmark.startPoint();
                        evaluation.evaluation(learningSettings, lightWeightApi);
                        Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                                + Benchmark.EVALUATION, this, benchmarkingFeatures);
                        benchmarkingFeatures.remove("numDocs");
                        break;
                    case MITRAINING:
                        if (LogService.maxVerbosityLevel > 0) {
                            System.out.println("** MITRAINING mode:");
                        }
                        LogService.logMessage("** MITRAINING mode:", 1);
                        isTraining = true;
                        benchmarkingFeatures.put("numDocs", "" + numDoc);
                        startTime = Benchmark.startPoint();
                        /**
                         * Need to write the NLP features into a temporary file, then copy
                         * it into the NLP file.
                         */
                        BufferedWriter outNLPFeaturesTemp =
                                new BufferedWriter(
                                new OutputStreamWriter(
                                new FileOutputStream(
                                new File(
                                workingDirectoryResults,
                                ConstantParameters.FILENAMEOFNLPFeaturesDataTemp)),
                                "UTF-8"));
                        for (int i = 0; i < numDoc; ++i) {
                            lightWeightApi.annotations2NLPFeatures((Document) corpus.get(i),
                                    i, outNLPFeaturesTemp, isTraining, learningSettings);
                        }
                        outNLPFeaturesTemp.flush();
                        outNLPFeaturesTemp.close();
                        lightWeightApi.finishFVs(workingDirectoryResults, numDoc, isTraining,
                                learningSettings);
                        Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                                + Benchmark.ANNOTS_TO_NLP_FEATURES, this,
                                benchmarkingFeatures);
                        lightWeightApi.copyNLPFeat2NormalFile(workingDirectoryResults,
                                miLearningInfor.miNumDocsTraining);
                        /**
                         * Use the temp NLP feature file instead of the normal one for
                         * MI-training.
                         */
                        inNLPFeatures =
                                new BomStrippingInputStreamReader(
                                new FileInputStream(
                                new File(
                                workingDirectoryResults,
                                ConstantParameters.FILENAMEOFNLPFeaturesDataTemp)),
                                "UTF-8");
                        outFeatureVectors =
                                new BufferedWriter(
                                new OutputStreamWriter(
                                new FileOutputStream(
                                new File(
                                workingDirectoryResults,
                                ConstantParameters.FILENAMEOFFeatureVectorData),
                                true), "UTF-8"));
                        startTime = Benchmark.startPoint();
                        lightWeightApi.nlpfeatures2FVs(workingDirectoryResults, inNLPFeatures,
                                outFeatureVectors, numDoc, isTraining, learningSettings);
                        inNLPFeatures.close();
                        outFeatureVectors.flush();
                        outFeatureVectors.close();
                        Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                                + Benchmark.NLP_FEATURES_TO_FVS, this,
                                benchmarkingFeatures);
                        System.gc(); // to make effort to delete the files.
                        miLearningInfor.miNumDocsTraining += numDoc;
                        miLearningInfor.miNumDocsFromLast += numDoc;
                        if (miLearningInfor.miNumDocsFromLast >= learningSettings.miDocInterval) {
                            // Start learning
                            // if fitering the training data
                            if (learningSettings.fiteringTrainingData
                                    && learningSettings.filteringRatio > 0.0) {
                                benchmarkingFeatures.put("numDocs",
                                        miLearningInfor.miNumDocsTraining + "");
                                startTime = Benchmark.startPoint();
                                lightWeightApi.FilteringNegativeInstsInJava(
                                        miLearningInfor.miNumDocsTraining, learningSettings);
                                Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                                        + Benchmark.FILTERING, this, benchmarkingFeatures);
                            }
                            startTime = Benchmark.startPoint();
                            // using the java code for training
                            lightWeightApi.trainingJava(miLearningInfor.miNumDocsTraining,
                                    learningSettings);
                            Benchmark.checkPoint(startTime, getBenchmarkId() + "." + "."
                                    + Benchmark.MODEL_TRAINING, this, benchmarkingFeatures);
                            benchmarkingFeatures.remove("numDocs");
                            // Reset the num from last training as 0
                            miLearningInfor.miNumDocsFromLast = 0;
                        }
                        File miLeFile =
                                new File(workingDirectoryResults,
                                ConstantParameters.FILENAMEOFMILearningInfor);
                        miLearningInfor.writeDataIntoFile(miLeFile);
                        break;
                    default:
                        throw new GateException("The learning mode is not defined!");
                }
                LogService.logMessage("This learning session finished!.", 1);
                // LogService.close();
            } catch (IOException e) {
                e.printStackTrace();
            } catch (GateException e) {
                e.printStackTrace();
            }
            // reset the parentBenchmarkID
            if (oldLightWeightApiParentId != null) {
                lightWeightApi.setParentBenchmarkId(oldLightWeightApiParentId);
            }
            if (LogService.maxVerbosityLevel > 0) {
                System.out.println("This learning session finished!");
            }
        } // end of learning (position=corpus.size()-1)
    }

    /** Print out the information for featureData only option. */
    private void displayDataFilesInformation() {
        StringBuffer logMessage = new StringBuffer();
        logMessage.append("NLP features for all the documents are in the file"
                + workingDirectoryResults.getAbsolutePath() + File.separator
                + ConstantParameters.FILENAME_FEATURES_CSV + "\n");
        logMessage.append("Feature vectors in sparse format are in the file"
                + workingDirectoryResults.getAbsolutePath() + File.separator
                + ConstantParameters.FILENAMEOFFeatureVectorData + "\n");
        logMessage.append("Label list is in the file" + workingDirectoryResults.getAbsolutePath()
                + File.separator + ConstantParameters.FILENAMEOFLabelList + "\n");
        logMessage.append("NLP features list is in the file"
                + workingDirectoryResults.getAbsolutePath() + File.separator
                + ConstantParameters.FILENAMEOFNLPFeatureList + "\n");
        logMessage.append("The statistics of entity length for each class is in the file"
                + workingDirectoryResults.getAbsolutePath()
                + File.separator
                + ConstantParameters.FILENAMEOFChunkLenStats + "\n");
        System.out.println(logMessage.toString());
        LogService.logMessage(logMessage.toString(), 1);
    }

    /**
     * Determine the directory used to store temporary files when running in
     * APPLICATION mode.
     */
    protected File getApplicationTempDir() {
        if (applicationTempDir == null) {
            LogService.logMessage(
                    "Creating temp directory for application-mode files", 1);
            try {
                applicationTempDir = File.createTempFile("appl", ".tmp", workingDirectoryResults);
                applicationTempDir.delete();
                if (!applicationTempDir.mkdir()) {
                    throw new IOException(
                            "Error creating directory " + applicationTempDir);
                }
            } catch (IOException ioe) {
                LogService.logMessage("Could not create temporary directory for "
                        + "application-mode temp files, using " + workingDirectoryResults, 1);
                applicationTempDir = workingDirectoryResults;
            }
        }
        return applicationTempDir;
    }

    /**
     * Delete the temporary directory for application-mode temp files when this
     * resource is deleted.
     */
    @Override
    public void cleanup() {
        if (applicationTempDir != null && !applicationTempDir.equals(workingDirectoryResults)) {
            deleteRecursively(applicationTempDir);
        }
    }

    /**
     * Delete a file or directory. If the argument is a directory, delete its
     * contents first, then remove the directory itself.
     */
    private void deleteRecursively(File fileOrDir) {
        if (fileOrDir.isDirectory()) {
            for (File f : fileOrDir.listFiles()) {
                deleteRecursively(f);
            }
        }
        if (!fileOrDir.delete()) {
            LogService.logMessage("Couldn't delete "
                    + (fileOrDir.isDirectory() ? "directory " : "file ") + fileOrDir,
                    1);
        }
    }

    public void setConfigFileURL(URL workingDirectory) {
        this.configFileURL = workingDirectory;
    }

    public URL getConfigFileURL() {
        return this.configFileURL;
    }

    public void setInputASName(String iasn) {
        this.inputASName = iasn;
    }

    public String getInputASName() {
        return this.inputASName;
    }

    public void setOutputASName(String iasn) {
        this.outputASName = iasn;
    }

    public String getOutputASName() {
        return this.outputASName;
    }

    public RunMode getLearningMode() {
        return this.learningMode;
    }

    public void setLearningMode(RunMode learningM) {
        this.learningMode = learningM;
    }

    public EvaluationBasedOnDocs getEvaluation() {
        return evaluation;
    }

    public EvaluationBasedOnDocs setEvaluation(EvaluationBasedOnDocs eval) {
        return this.evaluation = eval;
    }
    // /////// Benchmarkable ////////////////
    private String parentBenchmarkID;
    private String benchmarkID;

    /**
     * Returns the benchmark ID of the parent of this resource.
     *
     * @return
     */
    public String getParentBenchmarkId() {
        return this.parentBenchmarkID;
    }

    /**
     * Returns the benchmark ID of this resource.
     *
     * @return
     */
    public String getBenchmarkId() {
        if (this.benchmarkID == null) {
            benchmarkID = getName().replaceAll("[ ]+", "_");
        }
        return this.benchmarkID;
    }

    /**
     * Given an ID of the parent resource, this method is responsible for
     * producing the Benchmark ID, unique to this resource.
     *
     * @param parentID
     */
    public void createBenchmarkId(String parentID) {
        parentBenchmarkID = parentID;
        benchmarkID = Benchmark.createBenchmarkId(getName(), parentID);
    }

    /**
     * This method sets the benchmarkID for this resource.
     *
     * @param benchmarkID
     */
    public void setParentBenchmarkId(String benchmarkID) {
        parentBenchmarkID = benchmarkID;
    }

    /**
     * Returns the logger object being used by this resource.
     *
     * @return
     */
    public Logger getLogger() {
        return Benchmark.logger;
    }

    public void setBenchmarkId(String arg0) {
        // stub
    }
}
